########################################################
# DESANTE (2013) SURVEY EXPERIMENT WELFARE ALLOCATION 

rm(list = ls())

# print date and time of run
print("Starting date and time")
Sys.time()

# load packages and functions
R_folder <- "INSERT HERE"
source(paste0(R_folder, "survey_functions.R"))

# images directory
images_directory <- "INSERT HERE"

# grayscale for the graphics or not
# print_colormodel <- "srgb" # in color
print_colormodel <- "grey"

# load data
data_folder <- "INSERT HERE"
wm <- read.csv(paste0(data_folder, "desante_welfare.csv"))
print("Complete: loaded data")
Sys.time()

# generate the allocation of welfare to each applicant
wm$app1 <- rowSums(wm[,c("EBLB_1","EPLE_1","EELP_1",
                         "LBLB_1","LELP_1","LPLE_1")],
                   na.rm=T)
wm$app2 <- rowSums(wm[,c("EBLB_2","EPLE_2","EELP_2",
                         "LBLB_2","LELP_2","LPLE_2")],
                   na.rm=T)
emily1 <- c()
emily2 <- c()
emily3 <- c()
emily1[wm$EBLB_1!="NA"] <- 1
emily2[wm$EPLE_1!="NA"] <- 1
emily3[wm$EELP_1!="NA"] <- 1
emily1[is.na(emily1)] <- 0
emily2[is.na(emily2)] <- 0
emily3[is.na(emily3)] <- 0
wm$emily <- emily1+emily2+emily3 # Dummy for Emily 
latoya1 <- c()
latoya2 <- c()
latoya3 <- c()
latoya1[wm$LBLB_1!="NA"] <- 1 
latoya2[wm$LPLE_1!="NA"] <- 1
latoya3[wm$LELP_1!="NA"] <- 1
latoya1[is.na(latoya1)] <- 0
latoya2[is.na(latoya2)] <- 0
latoya3[is.na(latoya3)] <- 0
wm$latoya <- latoya1+latoya2+latoya3 # Dummy for Latoya
wm$blank <- ifelse(emily1==1 | latoya1==1, 1, 0) # Blank
wm$app1p <- ifelse(emily2==1 | latoya2==1, 1, 0) # Applicant 1 Poor
wm$app1e <- ifelse(emily3==1 | latoya3==1, 1, 0) # Applicant 1 Excellent
# dummy for controlling for work quality
wm$workq <- ifelse(wm$blank==1, 0, 1)
# blocks for the types
wm$blocks[wm$blank==1] <- 1
wm$blocks[wm$app1p==1] <- 2
wm$blocks[wm$app1e==1] <- 3
# create each of the placebo test variables
wm$pedu <- ifelse(rowSums(wm[,c("emiedu", "latedu")],
                   na.rm=T)>0,rowSums(wm[,c("emiedu", "latedu")],
                                      na.rm=T),NA)
wm$pwork <- ifelse(rowSums(wm[,c("emiwork", "latwork")],
                           na.rm=T)>0,rowSums(wm[,c("emiwork", "latwork")],
                                              na.rm=T),NA)
wm$pcrime <- ifelse(rowSums(wm[,c("emicrime", "latcrime")],
                            na.rm=T)>0,rowSums(wm[,c("emicrime", "latcrime")],
                                               na.rm=T),NA)
wm$pses <- ifelse(rowSums(wm[,c("emises", "latses")],
                          na.rm=T)>0,rowSums(wm[,c("emises", "latses")],
                                             na.rm=T),NA)
wm$pparent <- ifelse(rowSums(wm[,c("emiparent", "latparent")],
                             na.rm=T)>0,rowSums(wm[,c("emiparent", "latparent")],
                                                na.rm=T),NA)
wm$pchild <- ifelse(rowSums(wm[,c("emichild", "latchild")],
                            na.rm=T)>0,rowSums(wm[,c("emichild", "latchild")],
                                               na.rm=T),NA)
# construct the difference variables
wm$dedu <- wm$pedu-wm$lauedu
wm$dwork <- wm$pwork-wm$lauwork
wm$dcrime <- wm$pcrime-wm$laucrime
wm$dses <- wm$pses-wm$lauses
wm$dparent <- wm$pparent-wm$lauparent
wm$dchild <- wm$pchild-wm$lauchild

# plot density graphs
rename_values <- function(variable, old_names, new_names) {
  for (i in 1:length(new_names)) {
    variable[variable == old_names[i]] <- new_names[i]
  }
  return(variable)
}

plot_data <- dplyr::select(wm, latoya, workq, dedu, dwork, dcrime, dses, dparent, dchild)
plot_data$dedu <- -1*plot_data$dedu
plot_data$dwork <- -1*plot_data$dwork
plot_data$dparent <- -1*plot_data$dparent
plot_data <- melt(plot_data, measure.vars = c("dedu", "dwork", "dcrime", "dses", "dparent", "dchild"))

plot_data$variable <- as.character(plot_data$variable)
new_names <- c("Having Not Completed\nHigh School",
               "Having Not Worked in\nPrevious Year",
               "Having a Criminal\nConviction",
               "Having Grown Up in\nLow SES Family",
               "Not Having Good\nParenting Skills",
               "Having Another Child\nin Next 2 Years")
plot_data$variable <- rename_values(variable = plot_data$variable, 
                                    old_names = unique(plot_data$variable),
                                    new_names = new_names)
plot_data$variable <- factor(plot_data$variable, levels = new_names[c(5,3,2,6,1,4)])
plot_data$workq <- factor(plot_data$workq, labels = c("Basic", "Covariate Control"))
plot_data$latoya <- factor(plot_data$latoya, labels = c("Emily - Laurie", "Latoya - Laurie"))

get_density <- function(input_data, input_subset) {
  input_subset <- as.character(unlist(input_subset))
  input_data <- input_data[as.character(input_data$latoya) == input_subset[1] &
                             as.character(input_data$workq) == input_subset[2] &
                             as.character(input_data$variable) == input_subset[3],]
  xhist <- binning(input_data["value"], breaks=-4.0, bw=1)
  den <- bde(x=xhist, type="smkde", from=-4.5, to=4.5) 
  temp <- data.frame(latoya = as.character(unique(input_data$latoya)), 
                     workq = as.character(unique(input_data$workq)),
                     variable = as.character(unique(input_data$variable)),
                     x = den$x, y = den$y)
  return(temp)
}

all_combo <- expand.grid(latoya = unique(plot_data$latoya), 
                         workq = unique(plot_data$workq),
                         variable = unique(plot_data$variable))

d_res <- get_density(input_data = plot_data, input_subset = all_combo[1,])
for (i in 2:nrow(all_combo)) {
  d_res <- rbind(d_res, get_density(input_data = plot_data, 
                                    input_subset = all_combo[i,]))
}

# add in means
summary_data <- plot_data %>% group_by(latoya, workq, variable) %>% 
  dplyr::summarise(mean_value = mean(value, na.rm = TRUE),
                   var = var(value, na.rm = TRUE)) %>% as.data.frame()
d_res <- merge(x = d_res, y = summary_data, all.x = TRUE)

d_res$latoya <- factor(d_res$latoya, levels = levels(plot_data$latoya))
d_res$variable <- factor(d_res$variable, levels = levels(plot_data$variable))
d_res$workq <- factor(d_res$workq, levels = levels(plot_data$workq))

# Make density plot
ggplot(d_res, 
       aes(x = x, y = y, fill = latoya)) + 
  geom_area(position = "identity", alpha = 0.75, color = "black") + 
  xlim(-4.5, 4) + facet_grid(variable ~workq) +
  theme_bb() + ylab("Density") + 
  geom_vline(aes(xintercept = mean_value, color = latoya), 
             linetype="longdash", size=1) +
  xlab("Likelihood Paired Difference (Applicant 1 - Applicant 2)") +
  scale_color_manual(name="Treatment Assignment", 
                     values=c("orange3", "steelblue4")) + 
  scale_fill_manual(name="Treatment Assignment", 
                    values=c("orange1", "steelblue4"))
ggsave(paste0(images_directory, "Figure_39.pdf"), 
       width = 6.5, height = 11, dpi = 300, colormodel = print_colormodel)  
print("Complete: density plot")
Sys.time()

# Regression outputs
placebo_names <- c("Likelihood of Having Not Completed High School",
                   "Likelihood of Having Not Worked in Previous Year",
                   "Likelihood of Having a Criminal Conviction",
                   "Likelihood of Having Grown Up in Low SES Family",
                   "Likelihood of Not Having Good Parenting Skills",
                   "Likelihood of Having Another Child in Next 2 Years")
std_res <- as.data.frame(matrix(NA, 12, 2))
for (j in 0:1){
  std_res[j*6+1,] <- robustse(lm(scale(-1*dedu,center=T,scale=T)~latoya, 
                            data=wm[wm$workq==j,]))[2,1:2]
  std_res[j*6+2,] <- robustse(lm(scale(-1*dwork,center=T,scale=T)~latoya, 
                             data=wm[wm$workq==j,]))[2,1:2]
  std_res[j*6+3,] <- robustse(lm(scale(dcrime,center=T,scale=T)~latoya, 
                             data=wm[wm$workq==j,]))[2,1:2]
  std_res[j*6+4,] <- robustse(lm(scale(dses,center=T,scale=T)~latoya, 
                            data=wm[wm$workq==j,]))[2,1:2]
  std_res[j*6+5,] <- robustse(lm(scale(-1*dparent,center=T,scale=T)~latoya, 
                              data=wm[wm$workq==j,]))[2,1:2]
  std_res[j*6+6,] <- robustse(lm(scale(dchild,center=T,scale=T)~latoya, 
                              data=wm[wm$workq==j,]))[2,1:2]
}
std_res$placebo <- rep(placebo_names, 2)
names(std_res)[1:2] <- c("mean","se")
std_res$placebo <- factor(as.character(std_res$placebo), 
                          levels=rev(unique(placebo_names)[c(4,1,6,2,3,5)]))
std_res$v <- factor(c(rep("Basic", 6), rep("Covariate Control", 6)))
std_res$v2 <- factor(std_res$v, levels = rev(levels(std_res$v)))
# Make coefficient plot 
ggplot(std_res, 
            aes(x=mean,y=v2,shape=v,color=v)) + 
  geom_vline(xintercept=0, linetype="longdash")+
  geom_errorbarh(aes(xmax =  mean + qnorm(1-0.005)*se, 
                     xmin = mean + qnorm(0.005)*se),
                 size=0.6, height = 0) +
  geom_errorbarh(aes(xmax =  mean + qnorm(1-0.025)*se, 
                     xmin = mean + qnorm(0.025)*se),
                 size=1.0, height=0) +
  geom_point(stat="identity",size=3.5,fill="white")+
  scale_color_manual(name="Vignette Type",
                     values=c("firebrick3","forestgreen"))+
  scale_shape_manual(name="Vignette Type",values=c(21,22)) + 
  facet_wrap( ~ placebo, ncol=2)+
  theme_bb()+
  xlab("Standardized Paired Difference [(Latoya-Laurie)-(Emily-Laurie)]")+
  ylab("") + scale_y_discrete(breaks=NULL)
ggsave(paste0(images_directory, "Figure_06.pdf"), height=4.5, width=7.25,
       dpi = 300, colormodel = print_colormodel)
print("Complete: standardized outcome coefficient plot")
Sys.time()

# non-standardized analysis
nonstd_res <- as.data.frame(matrix(NA, 12, 2))
for (j in 0:1){
  nonstd_res[j*6+1,] <- robustse(lm(-1*dedu~latoya, 
                                    data=wm[wm$workq==j,]))[2,1:2]
  nonstd_res[j*6+2,] <- robustse(lm(-1*dwork~latoya, 
                                    data=wm[wm$workq==j,]))[2,1:2]
  nonstd_res[j*6+3,] <- robustse(lm(dcrime~latoya, 
                                    data=wm[wm$workq==j,]))[2,1:2]
  nonstd_res[j*6+4,] <- robustse(lm(dses~latoya, 
                                    data=wm[wm$workq==j,]))[2,1:2]
  nonstd_res[j*6+5,] <- robustse(lm(-1*dparent~latoya, 
                                    data=wm[wm$workq==j,]))[2,1:2]
  nonstd_res[j*6+6,] <- robustse(lm(dchild~latoya, 
                                    data=wm[wm$workq==j,]))[2,1:2]
}
nonstd_res$placebo <- rep(placebo_names, 2)
names(nonstd_res)[1:2] <- c("mean","se")
nonstd_res$placebo <- factor(as.character(nonstd_res$placebo), 
                             levels=rev(unique(placebo_names)[c(4,1,6,2,3,5)]))
nonstd_res$v <- factor(c(rep("Basic", 6), rep("Covariate Control", 6)))
nonstd_res$v2 <- factor(nonstd_res$v, levels = rev(levels(nonstd_res$v)))
ggplot(nonstd_res, 
            aes(x=mean,y=v2,shape=v,color=v))+
  geom_vline(xintercept=0, linetype="longdash")+
  geom_errorbarh(aes(xmax =  mean + 2.576*se, 
                     xmin = mean - 2.576*se),
                 size=0.6, height=0) +
  geom_errorbarh(aes(xmax =  mean + 1.96*se, 
                     xmin = mean - 1.96*se),
                 size=1.0, height=0)+
  geom_point(stat="identity",size=3.5,fill="white")+
  scale_color_manual(name="Vingette Type",
                     values=c("firebrick3","forestgreen"))+
  scale_shape_manual(name="Vingette Type",values=c(21,22)) +
  facet_wrap( ~ placebo, ncol=2)+theme_bbtop()+
  xlab("Non-standardized Paired Difference [(Latoya-Laurie)-(Emily-Laurie)]")+
  ylab("")+scale_y_discrete(breaks=NULL)
ggsave(paste0(images_directory, "Figure_40.pdf"), height=4.5, width=7.25,
       dpi = 300, colormodel = print_colormodel)

